{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Simple Linear Regression for stock using scikit-learn\n"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import math\n",
        "import seaborn as sns\n",
        "%matplotlib inline\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:45.342Z",
          "iopub.execute_input": "2021-10-16T01:40:45.346Z",
          "iopub.status.idle": "2021-10-16T01:40:45.994Z",
          "shell.execute_reply": "2021-10-16T01:40:45.988Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "stock = 'AAPL'\n",
        "start = '2016-01-01' \n",
        "end = '2018-01-01'\n",
        "data = yf.download(stock, start, end)\n",
        "data.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "                 Open       High        Low      Close  Adj Close     Volume\nDate                                                                        \n2016-01-04  25.652500  26.342501  25.500000  26.337500  24.286827  270597600\n2016-01-05  26.437500  26.462500  25.602501  25.677500  23.678221  223164000\n2016-01-06  25.139999  25.592501  24.967501  25.174999  23.214844  273829600\n2016-01-07  24.670000  25.032499  24.107500  24.112499  22.235073  324377600\n2016-01-08  24.637501  24.777500  24.190001  24.240000  22.352646  283192000",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2016-01-04</th>\n      <td>25.652500</td>\n      <td>26.342501</td>\n      <td>25.500000</td>\n      <td>26.337500</td>\n      <td>24.286827</td>\n      <td>270597600</td>\n    </tr>\n    <tr>\n      <th>2016-01-05</th>\n      <td>26.437500</td>\n      <td>26.462500</td>\n      <td>25.602501</td>\n      <td>25.677500</td>\n      <td>23.678221</td>\n      <td>223164000</td>\n    </tr>\n    <tr>\n      <th>2016-01-06</th>\n      <td>25.139999</td>\n      <td>25.592501</td>\n      <td>24.967501</td>\n      <td>25.174999</td>\n      <td>23.214844</td>\n      <td>273829600</td>\n    </tr>\n    <tr>\n      <th>2016-01-07</th>\n      <td>24.670000</td>\n      <td>25.032499</td>\n      <td>24.107500</td>\n      <td>24.112499</td>\n      <td>22.235073</td>\n      <td>324377600</td>\n    </tr>\n    <tr>\n      <th>2016-01-08</th>\n      <td>24.637501</td>\n      <td>24.777500</td>\n      <td>24.190001</td>\n      <td>24.240000</td>\n      <td>22.352646</td>\n      <td>283192000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:46.000Z",
          "iopub.execute_input": "2021-10-16T01:40:46.003Z",
          "iopub.status.idle": "2021-10-16T01:40:46.872Z",
          "shell.execute_reply": "2021-10-16T01:40:46.921Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df = data.reset_index()\n",
        "df.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/plain": "        Date       Open       High        Low      Close  Adj Close     Volume\n0 2016-01-04  25.652500  26.342501  25.500000  26.337500  24.286827  270597600\n1 2016-01-05  26.437500  26.462500  25.602501  25.677500  23.678221  223164000\n2 2016-01-06  25.139999  25.592501  24.967501  25.174999  23.214844  273829600\n3 2016-01-07  24.670000  25.032499  24.107500  24.112499  22.235073  324377600\n4 2016-01-08  24.637501  24.777500  24.190001  24.240000  22.352646  283192000",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Date</th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2016-01-04</td>\n      <td>25.652500</td>\n      <td>26.342501</td>\n      <td>25.500000</td>\n      <td>26.337500</td>\n      <td>24.286827</td>\n      <td>270597600</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2016-01-05</td>\n      <td>26.437500</td>\n      <td>26.462500</td>\n      <td>25.602501</td>\n      <td>25.677500</td>\n      <td>23.678221</td>\n      <td>223164000</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2016-01-06</td>\n      <td>25.139999</td>\n      <td>25.592501</td>\n      <td>24.967501</td>\n      <td>25.174999</td>\n      <td>23.214844</td>\n      <td>273829600</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2016-01-07</td>\n      <td>24.670000</td>\n      <td>25.032499</td>\n      <td>24.107500</td>\n      <td>24.112499</td>\n      <td>22.235073</td>\n      <td>324377600</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2016-01-08</td>\n      <td>24.637501</td>\n      <td>24.777500</td>\n      <td>24.190001</td>\n      <td>24.240000</td>\n      <td>22.352646</td>\n      <td>283192000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:46.877Z",
          "iopub.execute_input": "2021-10-16T01:40:46.881Z",
          "iopub.status.idle": "2021-10-16T01:40:46.890Z",
          "shell.execute_reply": "2021-10-16T01:40:46.924Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = df.drop(['Date','Close'], axis=1)\n",
        "y = df['Adj Close']"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:46.897Z",
          "iopub.execute_input": "2021-10-16T01:40:46.902Z",
          "iopub.status.idle": "2021-10-16T01:40:46.909Z",
          "shell.execute_reply": "2021-10-16T01:40:46.926Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "# Split X and y into X_\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
        "                                   test_size=0.25, random_state=0)"
      ],
      "outputs": [],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:46.914Z",
          "iopub.execute_input": "2021-10-16T01:40:46.917Z",
          "shell.execute_reply": "2021-10-16T01:40:47.403Z",
          "iopub.status.idle": "2021-10-16T01:40:47.412Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.linear_model import LinearRegression\n",
        "\n",
        "regression_model = LinearRegression()\n",
        "regression_model.fit(X_train, y_train)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 6,
          "data": {
            "text/plain": "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n         normalize=False)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:47.419Z",
          "iopub.execute_input": "2021-10-16T01:40:47.424Z",
          "iopub.status.idle": "2021-10-16T01:40:47.456Z",
          "shell.execute_reply": "2021-10-16T01:40:47.451Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "intercept = regression_model.intercept_\n",
        "\n",
        "print(\"The intercept for our model is {}\".format(intercept))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The intercept for our model is -1.659117287999834e-12\n"
          ]
        }
      ],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:47.462Z",
          "iopub.execute_input": "2021-10-16T01:40:47.465Z",
          "iopub.status.idle": "2021-10-16T01:40:47.474Z",
          "shell.execute_reply": "2021-10-16T01:40:47.552Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "regression_model.score(X_test, y_test)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": "1.0"
          },
          "metadata": {}
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:47.479Z",
          "iopub.execute_input": "2021-10-16T01:40:47.483Z",
          "iopub.status.idle": "2021-10-16T01:40:47.490Z",
          "shell.execute_reply": "2021-10-16T01:40:47.555Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import mean_squared_error\n",
        "\n",
        "y_predict = regression_model.predict(X_test)\n",
        "\n",
        "regression_model_mse = mean_squared_error(y_predict, y_test)\n",
        "\n",
        "regression_model_mse"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "2.57258510115635e-25"
          },
          "metadata": {}
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:47.496Z",
          "iopub.execute_input": "2021-10-16T01:40:47.501Z",
          "iopub.status.idle": "2021-10-16T01:40:47.509Z",
          "shell.execute_reply": "2021-10-16T01:40:47.558Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "math.sqrt(regression_model_mse)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "5.07206575386829e-13"
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:47.515Z",
          "iopub.execute_input": "2021-10-16T01:40:47.518Z",
          "iopub.status.idle": "2021-10-16T01:40:47.526Z",
          "shell.execute_reply": "2021-10-16T01:40:47.560Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input the latest Open, High, Low, Close, Volume\n",
        "# predicts the next day price\n",
        "regression_model.predict([[167.81, 171.75, 165.19, 166.48, 37232900]])"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "array([166.48])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-10-16T01:40:47.531Z",
          "iopub.execute_input": "2021-10-16T01:40:47.534Z",
          "iopub.status.idle": "2021-10-16T01:40:47.542Z",
          "shell.execute_reply": "2021-10-16T01:40:47.562Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.13",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}